{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from bs4 import BeautifulSoup\n",
    "localtext = open('5.html', 'r', encoding='utf-8')\n",
    "soup = BeautifulSoup(localtext, 'lxml')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<!DOCTYPE html>\n",
      "<html lang=\"en\">\n",
      "<head>\n",
      "<meta charset=\"utf-8\"/>\n",
      "<meta content=\"width=device-width, initial-scale=1.0\" name=\"viewport\"/>\n",
      "<meta content=\"ie=edge\" http-equiv=\"X-UA-Compatible\"/>\n",
      "<title>节点的概念</title>\n",
      "</head>\n",
      "<body>\n",
      "<div class=\"div\" id=\"divid\">\n",
      "<p class=\"p\">p标签</p>\n",
      "<a class=\"a\">a标签</a>\n",
      "</div>\n",
      "</body>\n",
      "</html>\n"
     ]
    }
   ],
   "source": [
    "print(soup)\n",
    "# print(soup.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<title>节点的概念</title>\n"
     ]
    }
   ],
   "source": [
    "print(soup.title)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<head>\n",
      "<meta charset=\"utf-8\"/>\n",
      "<meta content=\"width=device-width, initial-scale=1.0\" name=\"viewport\"/>\n",
      "<meta content=\"ie=edge\" http-equiv=\"X-UA-Compatible\"/>\n",
      "<title>节点的概念</title>\n",
      "</head>\n"
     ]
    }
   ],
   "source": [
    "print(soup.head)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "节点的概念\n"
     ]
    }
   ],
   "source": [
    "print(soup.title.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "p标签\n",
      "a标签\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(soup.div.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['div']\n"
     ]
    }
   ],
   "source": [
    "print(soup.div['class'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "divid\n"
     ]
    }
   ],
   "source": [
    "print(soup.div['id'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<a class=\"a\">a标签</a>\n"
     ]
    }
   ],
   "source": [
    "print(soup.a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<a class=\"a\">a标签</a>\n"
     ]
    }
   ],
   "source": [
    "print(soup.div.a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a标签\n"
     ]
    }
   ],
   "source": [
    "print(soup.a.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['a']\n"
     ]
    }
   ],
   "source": [
    "print(soup.div.a['class'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## 获取属性，并指定默认值\n",
    "print(soup.title.get('class', False))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
